import pandas as pd
from IPython.display import Image
import numpy as np
from matplotlib import pyplot as plt
%matplotlib inline
import seaborn as sns
import plotly
import plotly.offline as py
import plotly.graph_objs as go
import plotly.io as pio
from collections import defaultdict
import collections
import os
import psutil
import warnings
warnings.filterwarnings('ignore')
deli=pd.read_csv(r'deliveries.csv')
mat=pd.read_csv(r'matches.csv')
##same team
deli=deli.replace(to_replace="Rising Pune Supergiant",value="Rising Pune Supergiants")
mat=mat.replace(to_replace="Rising Pune Supergiant",value="Rising Pune Supergiants")
##delhi daredevils to delhi capitals
deli=deli.replace(to_replace="Delhi Daredevils",value="Delhi Capitals")
mat=mat.replace(to_replace="Delhi Daredevils",value="Delhi Capitals")
##deccan chargers to sunrisers Hyderabad
deli=deli.replace(to_replace="deccan chargers",value="sunrisers Hyderabad")
mat=mat.replace(to_replace="deccan chargers",value="sunrisers Hyderabad")
##same name of stadiums
mat=mat.replace(to_replace="MA Chidambaran Stadium",value="M. A.Chidambaram Stadium")
mat=mat.replace(to_replace="Punjab Cricket Association IS Bindra Stadium,Mohali",value="Punjab Cricket Association Stadium,Mohali")
mat=mat.replace(to_replace="Rajiv Gandhi International Stadium,Uppal",value="Rajiv Gandhi Intl.Cricket Stadium")
mat=mat.replace(to_replace="ACA-VDCA Stadium",value="Dr. Y.S. Rajeskhara reddy ACA-VDCA Stadium")
mat=mat.replace(to_replace="M. Chinnaswamy Stadium",value="M Chinnaswamy Stadium")
mat['winner']=mat['winner'].fillna("No Result")
deli['bowling_team'].unique()
array(['Royal Challengers Bangalore', 'Sunrisers Hyderabad',
'Rising Pune Supergiants', 'Mumbai Indians',
'Kolkata Knight Riders', 'Gujarat Lions', 'Kings XI Punjab',
'Delhi Capitals', 'Chennai Super Kings', 'Rajasthan Royals',
'Deccan Chargers', 'Kochi Tuskers Kerala', 'Pune Warriors'],
dtype=object)
deli['batting_team'].unique()
array(['Sunrisers Hyderabad', 'Royal Challengers Bangalore',
'Mumbai Indians', 'Rising Pune Supergiants', 'Gujarat Lions',
'Kolkata Knight Riders', 'Kings XI Punjab', 'Delhi Capitals',
'Chennai Super Kings', 'Rajasthan Royals', 'Deccan Chargers',
'Kochi Tuskers Kerala', 'Pune Warriors'], dtype=object)
Teams={'Sunrisers Hyderabad': 'SRH', 'Royal Challengers Bangalore':'RCB',
'Mumbai Indians': 'MI', 'Rising Pune Supergiants': 'RPS', 'Gujarat Lions': 'GL',
'Kolkata Knight Riders': 'KKR', 'Kings XI Punjab': 'KXIP', 'Delhi Capitals': 'DD',
'Chennai Super Kings': 'CSK', 'Rajasthan Royals': 'RR', 'Deccan Chargers':'DC',
'Kochi Tuskers Kerala': 'KTK', 'Pune Warriors': 'PW'
}
deli['batting_team']=deli['batting_team'].map(Teams)
deli['bowling_team']=deli['bowling_team'].map(Teams)
deli
| match_id | inning | batting_team | bowling_team | over | ball | batsman | non_striker | bowler | is_super_over | ... | bye_runs | legbye_runs | noball_runs | penalty_runs | batsman_runs | extra_runs | total_runs | player_dismissed | dismissal_kind | fielder | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 1 | SRH | RCB | 1 | 1 | DA Warner | S Dhawan | TS Mills | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | NaN | NaN | NaN |
| 1 | 1 | 1 | SRH | RCB | 1 | 2 | DA Warner | S Dhawan | TS Mills | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | NaN | NaN | NaN |
| 2 | 1 | 1 | SRH | RCB | 1 | 3 | DA Warner | S Dhawan | TS Mills | 0 | ... | 0 | 0 | 0 | 0 | 4 | 0 | 4 | NaN | NaN | NaN |
| 3 | 1 | 1 | SRH | RCB | 1 | 4 | DA Warner | S Dhawan | TS Mills | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | NaN | NaN | NaN |
| 4 | 1 | 1 | SRH | RCB | 1 | 5 | DA Warner | S Dhawan | TS Mills | 0 | ... | 0 | 0 | 0 | 0 | 0 | 2 | 2 | NaN | NaN | NaN |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 179073 | 11415 | 2 | CSK | MI | 20 | 2 | RA Jadeja | SR Watson | SL Malinga | 0 | ... | 0 | 0 | 0 | 0 | 1 | 0 | 1 | NaN | NaN | NaN |
| 179074 | 11415 | 2 | CSK | MI | 20 | 3 | SR Watson | RA Jadeja | SL Malinga | 0 | ... | 0 | 0 | 0 | 0 | 2 | 0 | 2 | NaN | NaN | NaN |
| 179075 | 11415 | 2 | CSK | MI | 20 | 4 | SR Watson | RA Jadeja | SL Malinga | 0 | ... | 0 | 0 | 0 | 0 | 1 | 0 | 1 | SR Watson | run out | KH Pandya |
| 179076 | 11415 | 2 | CSK | MI | 20 | 5 | SN Thakur | RA Jadeja | SL Malinga | 0 | ... | 0 | 0 | 0 | 0 | 2 | 0 | 2 | NaN | NaN | NaN |
| 179077 | 11415 | 2 | CSK | MI | 20 | 6 | SN Thakur | RA Jadeja | SL Malinga | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | SN Thakur | lbw | NaN |
179078 rows × 21 columns
#asking user whether to save the plot on the disk or not
if(input("Save/download? y/n: ").lower()[0]=='y'):
save_files=True
print("\nplots will be downloaded/saved.")
else:
save_files=False
print("\nplots will not be downloaded/saved.")
Save/download? y/n: n plots will not be downloaded/saved.
mat['winner'].value_counts()
Mumbai Indians 109 Chennai Super Kings 100 Kolkata Knight Riders 92 Royal Challengers Bangalore 84 Kings XI Punjab 82 Delhi Capitals 77 Rajasthan Royals 75 Sunrisers Hyderabad 58 Deccan Chargers 29 Rising Pune Supergiants 15 Gujarat Lions 13 Pune Warriors 12 Kochi Tuskers Kerala 6 No Result 4 Name: winner, dtype: int64
finals=mat.drop_duplicates(subset=['season'],keep='last')
finals=finals[['id','season','city','team1','team2','toss_winner','toss_decision','winner']]
most_finals=pd.concat([finals['team1'],finals['team2']]).value_counts().reset_index()
most_finals.rename({'index':'team',0:'count'},axis=1,inplace=True)
wins=finals['winner'].value_counts().reset_index()
most_finals=most_finals.merge(wins,left_on='team',right_on='index',how='outer')
most_finals=most_finals.replace(np.NaN,0)
most_finals.drop('index',axis=1,inplace=True)
most_finals.rename({'count':'finals_played','winner':'won_count'},inplace=True,axis=1)
most_finals
| team | finals_played | won_count | |
|---|---|---|---|
| 0 | Chennai Super Kings | 8 | 3.0 |
| 1 | Mumbai Indians | 5 | 4.0 |
| 2 | Royal Challengers Bangalore | 3 | 0.0 |
| 3 | Sunrisers Hyderabad | 2 | 1.0 |
| 4 | Kolkata Knight Riders | 2 | 2.0 |
| 5 | Deccan Chargers | 1 | 1.0 |
| 6 | Kings XI Punjab | 1 | 0.0 |
| 7 | Rising Pune Supergiants | 1 | 0.0 |
| 8 | Rajasthan Royals | 1 | 1.0 |
scores=deli.groupby(['match_id','inning','batting_team','bowling_team'])['total_runs'].sum().reset_index()
scores
| match_id | inning | batting_team | bowling_team | total_runs | |
|---|---|---|---|---|---|
| 0 | 1 | 1 | SRH | RCB | 207 |
| 1 | 1 | 2 | RCB | SRH | 172 |
| 2 | 2 | 1 | MI | RPS | 184 |
| 3 | 2 | 2 | RPS | MI | 187 |
| 4 | 3 | 1 | GL | KKR | 183 |
| ... | ... | ... | ... | ... | ... |
| 1523 | 11413 | 2 | DD | SRH | 170 |
| 1524 | 11414 | 1 | DD | CSK | 155 |
| 1525 | 11414 | 2 | CSK | DD | 162 |
| 1526 | 11415 | 1 | MI | CSK | 152 |
| 1527 | 11415 | 2 | CSK | MI | 157 |
1528 rows × 5 columns
scores_1=scores[scores['total_runs']>=200]
scores_1
| match_id | inning | batting_team | bowling_team | total_runs | |
|---|---|---|---|---|---|
| 0 | 1 | 1 | SRH | RCB | 207 |
| 16 | 9 | 1 | DD | RPS | 205 |
| 38 | 20 | 1 | RCB | GL | 213 |
| 62 | 32 | 1 | SRH | KXIP | 207 |
| 72 | 36 | 1 | SRH | KKR | 209 |
| ... | ... | ... | ... | ... | ... |
| 1475 | 11326 | 2 | KKR | RCB | 219 |
| 1488 | 11333 | 1 | RCB | KXIP | 213 |
| 1498 | 11338 | 1 | KKR | MI | 241 |
| 1499 | 11338 | 2 | MI | KKR | 204 |
| 1500 | 11339 | 1 | SRH | KXIP | 232 |
105 rows × 5 columns
fig=plt.figure(figsize=(12,8))
plt.title("teams that have scores 200+",fontsize=30)
sns.countplot(x='batting_team',data=scores_1)
if save_files:
if not os.path.exists('plots'):
os.makedirs('plots')
filename='plots/teams_200s'
plt.savefig(filename,bbox_inches='tight')
matches_list=[]
total_innings=[]
batting_team=[]
runs=[]
for match_no in deli['match_id'].unique():
for innings in deli[deli['match_id']==match_no]['inning'].unique():
ad=deli[
(deli['match_id']==match_no)&
(deli['inning']==innings)
]
total_runs=ad['total_runs'].sum()
runs.append(total_runs)
matches_list.append(match_no)
total_innings.append(innings)
batting_team.append(ad['batting_team'].unique()[0])
ad0=pd.DataFrame()
ad0['match_id']=matches_list
ad0['total_runs']=runs
ad0['season']=[mat[mat['id']==i]['season'].unique()[0] for i in matches_list]
ad0['batting_team']=batting_team
ad0
| match_id | total_runs | season | batting_team | |
|---|---|---|---|---|
| 0 | 1 | 207 | 2017 | SRH |
| 1 | 1 | 172 | 2017 | RCB |
| 2 | 2 | 184 | 2017 | MI |
| 3 | 2 | 187 | 2017 | RPS |
| 4 | 3 | 183 | 2017 | GL |
| ... | ... | ... | ... | ... |
| 1523 | 11413 | 170 | 2019 | DD |
| 1524 | 11414 | 155 | 2019 | DD |
| 1525 | 11414 | 162 | 2019 | CSK |
| 1526 | 11415 | 152 | 2019 | MI |
| 1527 | 11415 | 157 | 2019 | CSK |
1528 rows × 4 columns
season=[]
teamid=[]
max_runs=[]
for year in ad0['season'].unique():
maximum_run=ad0[ad0['season']==year]['total_runs'].max()
team=ad0[(ad0['season']==year)& (ad0['total_runs']==maximum_run)]['batting_team'].unique()[0]
season.append(year)
teamid.append(team)
max_runs.append(maximum_run)
max_runs
[230, 240, 211, 246, 232, 222, 263, 231, 235, 248, 250, 241]
plt.style.use('ggplot')
fig=plt.gcf()
fig.set_size_inches(13,7.5)
plt.xticks(fontsize=15)
plt.yticks(fontsize=15)
plt.title("Highest team score in each season",fontsize=20)
ax=sns.barplot(x = season, y = max_runs, hue = teamid, dodge=False)
ax.legend(loc = 'center left', bbox_to_anchor=(1,0.7))
# save file
if save_files:
if not os.path.exists('plots'):
os.makedirs('plots')
filename = 'plots/teams_200s'
plt.savefig(filename, bbox_inches = 'tight')
matches_copy = pd.DataFrame
matches_copy = mat.copy(deep=True)
matches_copy['team1']=matches_copy['team1'].map(Teams)
matches_copy['team2']=matches_copy['team2'].map(Teams)
teams=(matches_copy['team1'].value_counts()+matches_copy['team2'].value_counts()).reset_index()
teams.columns=['team_name','Matches_played']
teams
| team_name | Matches_played | |
|---|---|---|
| 0 | CSK | 164 |
| 1 | DC | 75 |
| 2 | DD | 177 |
| 3 | GL | 30 |
| 4 | KKR | 178 |
| 5 | KTK | 14 |
| 6 | KXIP | 176 |
| 7 | MI | 187 |
| 8 | PW | 46 |
| 9 | RCB | 180 |
| 10 | RPS | 30 |
| 11 | RR | 147 |
| 12 | SRH | 108 |
matches_copy['winner']=matches_copy['winner'].map(Teams)
wins=matches_copy['winner'].value_counts().reset_index()
wins.columns=['team_name','wins']
wins
| team_name | wins | |
|---|---|---|
| 0 | MI | 109 |
| 1 | CSK | 100 |
| 2 | KKR | 92 |
| 3 | RCB | 84 |
| 4 | KXIP | 82 |
| 5 | DD | 77 |
| 6 | RR | 75 |
| 7 | SRH | 58 |
| 8 | DC | 29 |
| 9 | RPS | 15 |
| 10 | GL | 13 |
| 11 | PW | 12 |
| 12 | KTK | 6 |
player=teams.merge(wins,left_on='team_name',right_on='team_name',how='inner')
player['%win']=(player['wins']/player['Matches_played'])*100
player = player.sort_values('%win', ascending=False)
player
| team_name | Matches_played | wins | %win | |
|---|---|---|---|---|
| 0 | CSK | 164 | 100 | 60.975610 |
| 7 | MI | 187 | 109 | 58.288770 |
| 12 | SRH | 108 | 58 | 53.703704 |
| 4 | KKR | 178 | 92 | 51.685393 |
| 11 | RR | 147 | 75 | 51.020408 |
| 10 | RPS | 30 | 15 | 50.000000 |
| 9 | RCB | 180 | 84 | 46.666667 |
| 6 | KXIP | 176 | 82 | 46.590909 |
| 2 | DD | 177 | 77 | 43.502825 |
| 3 | GL | 30 | 13 | 43.333333 |
| 5 | KTK | 14 | 6 | 42.857143 |
| 1 | DC | 75 | 29 | 38.666667 |
| 8 | PW | 46 | 12 | 26.086957 |
trace1=go.Bar(x=player['team_name'], y=player['Matches_played'], name='Total Matches')
trace2=go.Bar(x=player['team_name'], y=player['wins'], name='Matches wins')
matches_data=[trace1,trace2]
fig = py.iplot(matches_data)
def team1_vs_team2(team1,team2):
mt1=mat[((mat['team1']==team1)|(mat['team2']==team1))&((mat['team1']==team2)|(mat['team2']==team2))]
plt.style.use('ggplot')
fig=plt.gcf()
fig.set_size_inches(13,7.5)
plt.xticks(fontsize=15)
plt.yticks(fontsize=15)
plt.title("ONE ON ONE CLASH",fontsize=20)
ax=sns.countplot(x = 'season', hue='winner',data=mt1, palette='YlGnBu')
ax.legend(loc = 'center left', bbox_to_anchor=(1,0.7))
# save file
if save_files:
if not os.path.exists('plots'):
os.makedirs('plots')
filename = 'plots/CSKvsMI_EverySeason'
plt.savefig(filename, bbox_inches = 'tight')
team1_vs_team2('Chennai Super Kings','Mumbai Indians')
data = matches_copy
micsk=data[np.logical_or(np.logical_and(data['team1']=='MI',data['team2']=='CSK'),np.logical_and(data['team2']=='MI',data['team1']=='CSK'))]
sns.set(style='dark')
fig=plt.gcf()
fig.set_size_inches(10,8)
sns.countplot(micsk['winner'],order=micsk['winner'].value_counts().index)
plt.text(-0.1,15,str(micsk['winner'].value_counts()['MI']),size=29,color='white')
plt.text(0.9,9,str(micsk['winner'].value_counts()['CSK']),size=29,color='white')
plt.xlabel('Winner',fontsize=15)
plt.ylabel('Count',fontsize=15)
plt.yticks(fontsize=0)
plt.title('MI vs CSK - head to head')
plt.show()
# save file
if save_files:
if not os.path.exists('plots'):
os.makedirs('plots')
filename = 'plots/CSKvsMI_HeadToHead'
fig.savefig(filename, bbox_inches='tight')
### Players with most Man of the Match awards in IPL
mom = pd.DataFrame()
mom['Awards']=mat['player_of_match'].value_counts()
mom['Player'] =mom.index
mom=mom[:20]
plt.style.use('ggplot')
fig=plt.gcf()
fig.set_size_inches(18.5,8.5)
plt.xticks(rotation=90,fontsize=15)
plt.yticks(fontsize=16)
plt.title("Most Man Of The Matches in IPL (All Seasons)",fontsize=20)
ax=sns.barplot(x='Player',y='Awards', data=mom)
count=0
for p in ax.patches:
height = p.get_height()
ax.text(p.get_x()+p.get_width()/2.,height + 0.3,mom['Awards'].iloc[count],ha="center")
count+=1
# save file
if save_files:
if not os.path.exists('plots'):
os.makedirs('plots')
filename = 'plots/most_MOM'
plt.savefig(filename, bbox_inches = 'tight')
dic=defaultdict(list)
for i in range(0,len(mat)):
dic[mat.season.iloc[i]].append(mat.player_of_match.iloc[i])
player=[]
match=[]
year=[]
for i in sorted(dic.keys()):
ctr=collections.Counter(dic[i])
d={k: v for k, v in sorted(ctr.items(), key=lambda item: item[1],reverse=True)}
player.append(list(d.keys())[0])
match.append(list(d.values())[0])
year.append(i)
plt.style.use('ggplot')
fig=plt.gcf()
fig.set_size_inches(18.5,7.5)
plt.xticks(fontsize=15)
plt.yticks(fontsize=15)
plt.title("Most Man Of The Match awards won by players each season", fontsize=20)
ax=sns.barplot(year, match)
count=0
for p in ax.patches:
height = p.get_height()
ax.text(p.get_x()+p.get_width()/2.,height + 0.15,player[count],ha="center")
count+=1
# save file
if save_files:
if not os.path.exists('plots'):
os.makedirs('plots')
filename = 'plots/most_MOM_EachSeason'
plt.savefig(filename, bbox_inches = 'tight')
### TOP RUN GETTERS IN IPL HISTORY ###
top_runGetters = pd.DataFrame(deli.groupby('batsman')['batsman_runs'].sum().sort_values(ascending=False).head(10))
top_runGetters.reset_index(inplace=True)
top_runGetters.columns=['Batsman', 'Total runs']
plt.style.use('ggplot')
fig=plt.gcf()
fig.set_size_inches(18.5,7.5)
ax=sns.barplot(x='Batsman',y='Total runs', data=top_runGetters)
plt.title("Top Run Getters in IPL", fontsize=20, fontweight = 'bold')
plt.xlabel("Batsmen", size = 25)
plt.ylabel("Total Runs Scored", size=25)
plt.xticks(fontsize=15)
plt.yticks(fontsize=15)
# save file
if save_files:
if not os.path.exists('plots'):
os.makedirs('plots')
filename = 'plots/TopRunScorers'
plt.savefig(filename, bbox_inches = 'tight')
top_runGetters.head()
| Batsman | Total runs | |
|---|---|---|
| 0 | V Kohli | 5434 |
| 1 | SK Raina | 5415 |
| 2 | RG Sharma | 4914 |
| 3 | DA Warner | 4741 |
| 4 | S Dhawan | 4632 |
balls=deli.groupby('batsman')['ball'].count().reset_index()
balls
| batsman | ball | |
|---|---|---|
| 0 | A Ashish Reddy | 196 |
| 1 | A Chandila | 7 |
| 2 | A Chopra | 75 |
| 3 | A Choudhary | 20 |
| 4 | A Dananjaya | 5 |
| ... | ... | ... |
| 511 | YV Takawale | 183 |
| 512 | Yashpal Singh | 67 |
| 513 | Younis Khan | 7 |
| 514 | Yuvraj Singh | 2207 |
| 515 | Z Khan | 141 |
516 rows × 2 columns
runs=deli.groupby('batsman')['batsman_runs'].sum().reset_index()
runs
| batsman | batsman_runs | |
|---|---|---|
| 0 | A Ashish Reddy | 280 |
| 1 | A Chandila | 4 |
| 2 | A Chopra | 53 |
| 3 | A Choudhary | 25 |
| 4 | A Dananjaya | 4 |
| ... | ... | ... |
| 511 | YV Takawale | 192 |
| 512 | Yashpal Singh | 47 |
| 513 | Younis Khan | 3 |
| 514 | Yuvraj Singh | 2765 |
| 515 | Z Khan | 117 |
516 rows × 2 columns
### FOURS HIT BY BATSMEN IN IPL SO FAR ###
four=deli[deli['batsman_runs']==4]
#four
runs_4=four.groupby('batsman')['batsman_runs'].count().reset_index()
runs_4.columns=['batsman','4s']
runs_4
| batsman | 4s | |
|---|---|---|
| 0 | A Ashish Reddy | 16 |
| 1 | A Chopra | 7 |
| 2 | A Choudhary | 1 |
| 3 | A Flintoff | 5 |
| 4 | A Hales | 13 |
| ... | ... | ... |
| 405 | YK Pathan | 264 |
| 406 | YV Takawale | 26 |
| 407 | Yashpal Singh | 5 |
| 408 | Yuvraj Singh | 218 |
| 409 | Z Khan | 11 |
410 rows × 2 columns
### SIXES HIT BY BATSMEN IN IPL SO FAR ###
six=deli[deli['batsman_runs']==6]
#six
runs_6=six.groupby('batsman')['batsman_runs'].count().reset_index()
runs_6.columns=['batsman','6s']
runs_6
| batsman | 6s | |
|---|---|---|
| 0 | A Ashish Reddy | 15 |
| 1 | A Choudhary | 1 |
| 2 | A Flintoff | 2 |
| 3 | A Hales | 6 |
| 4 | A Mishra | 5 |
| ... | ... | ... |
| 331 | Y Venugopal Rao | 37 |
| 332 | YK Pathan | 161 |
| 333 | YV Takawale | 3 |
| 334 | Yuvraj Singh | 149 |
| 335 | Z Khan | 2 |
336 rows × 2 columns
player=pd.concat([runs,balls.iloc[:,1],runs_4.iloc[:,1],runs_6.iloc[:,1]],axis=1)
player
player.fillna(0,inplace=True)
player
| batsman | batsman_runs | ball | 4s | 6s | |
|---|---|---|---|---|---|
| 0 | A Ashish Reddy | 280 | 196 | 16.0 | 15.0 |
| 1 | A Chandila | 4 | 7 | 7.0 | 1.0 |
| 2 | A Chopra | 53 | 75 | 1.0 | 2.0 |
| 3 | A Choudhary | 25 | 20 | 5.0 | 6.0 |
| 4 | A Dananjaya | 4 | 5 | 13.0 | 5.0 |
| ... | ... | ... | ... | ... | ... |
| 511 | YV Takawale | 192 | 183 | 0.0 | 0.0 |
| 512 | Yashpal Singh | 47 | 67 | 0.0 | 0.0 |
| 513 | Younis Khan | 3 | 7 | 0.0 | 0.0 |
| 514 | Yuvraj Singh | 2765 | 2207 | 0.0 | 0.0 |
| 515 | Z Khan | 117 | 141 | 0.0 | 0.0 |
516 rows × 5 columns
player['strike_rate']=(player['batsman_runs']/player['ball'])*100
player
| batsman | batsman_runs | ball | 4s | 6s | strike_rate | |
|---|---|---|---|---|---|---|
| 0 | A Ashish Reddy | 280 | 196 | 16.0 | 15.0 | 142.857143 |
| 1 | A Chandila | 4 | 7 | 7.0 | 1.0 | 57.142857 |
| 2 | A Chopra | 53 | 75 | 1.0 | 2.0 | 70.666667 |
| 3 | A Choudhary | 25 | 20 | 5.0 | 6.0 | 125.000000 |
| 4 | A Dananjaya | 4 | 5 | 13.0 | 5.0 | 80.000000 |
| ... | ... | ... | ... | ... | ... | ... |
| 511 | YV Takawale | 192 | 183 | 0.0 | 0.0 | 104.918033 |
| 512 | Yashpal Singh | 47 | 67 | 0.0 | 0.0 | 70.149254 |
| 513 | Younis Khan | 3 | 7 | 0.0 | 0.0 | 42.857143 |
| 514 | Yuvraj Singh | 2765 | 2207 | 0.0 | 0.0 | 125.283190 |
| 515 | Z Khan | 117 | 141 | 0.0 | 0.0 | 82.978723 |
516 rows × 6 columns
grp=deli.groupby(['match_id','batsman','batting_team'])['batsman_runs'].sum().reset_index()
grp
| match_id | batsman | batting_team | batsman_runs | |
|---|---|---|---|---|
| 0 | 1 | A Choudhary | RCB | 6 |
| 1 | 1 | BCJ Cutting | SRH | 16 |
| 2 | 1 | CH Gayle | RCB | 32 |
| 3 | 1 | DA Warner | SRH | 14 |
| 4 | 1 | DJ Hooda | SRH | 16 |
| ... | ... | ... | ... | ... |
| 11289 | 11415 | RD Chahar | MI | 0 |
| 11290 | 11415 | RG Sharma | MI | 15 |
| 11291 | 11415 | SK Raina | CSK | 9 |
| 11292 | 11415 | SN Thakur | CSK | 2 |
| 11293 | 11415 | SR Watson | CSK | 84 |
11294 rows × 4 columns
maximum=grp.groupby('batsman')['batsman_runs'].max().reset_index()
maximum.columns=['batsman','max_runs']
maximum
| batsman | max_runs | |
|---|---|---|
| 0 | A Ashish Reddy | 36 |
| 1 | A Chandila | 4 |
| 2 | A Chopra | 24 |
| 3 | A Choudhary | 15 |
| 4 | A Dananjaya | 4 |
| ... | ... | ... |
| 511 | YV Takawale | 45 |
| 512 | Yashpal Singh | 20 |
| 513 | Younis Khan | 3 |
| 514 | Yuvraj Singh | 83 |
| 515 | Z Khan | 23 |
516 rows × 2 columns
player2=pd.concat([player,maximum.iloc[:,1]],axis=1)
player2
player2_df = pd.DataFrame(player2)
player2.fillna(0,inplace=True)
player2
| batsman | batsman_runs | ball | 4s | 6s | strike_rate | max_runs | |
|---|---|---|---|---|---|---|---|
| 0 | A Ashish Reddy | 280 | 196 | 16.0 | 15.0 | 142.857143 | 36 |
| 1 | A Chandila | 4 | 7 | 7.0 | 1.0 | 57.142857 | 4 |
| 2 | A Chopra | 53 | 75 | 1.0 | 2.0 | 70.666667 | 24 |
| 3 | A Choudhary | 25 | 20 | 5.0 | 6.0 | 125.000000 | 15 |
| 4 | A Dananjaya | 4 | 5 | 13.0 | 5.0 | 80.000000 | 4 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 511 | YV Takawale | 192 | 183 | 0.0 | 0.0 | 104.918033 | 45 |
| 512 | Yashpal Singh | 47 | 67 | 0.0 | 0.0 | 70.149254 | 20 |
| 513 | Younis Khan | 3 | 7 | 0.0 | 0.0 | 42.857143 | 3 |
| 514 | Yuvraj Singh | 2765 | 2207 | 0.0 | 0.0 | 125.283190 | 83 |
| 515 | Z Khan | 117 | 141 | 0.0 | 0.0 | 82.978723 | 23 |
516 rows × 7 columns
deli.groupby(['match_id','batsman','batting_team'])['batsman_runs'].sum().reset_index().sort_values(by='batsman_runs',ascending=False).head(10)
| match_id | batsman | batting_team | batsman_runs | |
|---|---|---|---|---|
| 6200 | 411 | CH Gayle | RCB | 175 |
| 900 | 60 | BB McCullum | KKR | 158 |
| 8426 | 562 | AB de Villiers | RCB | 133 |
| 10149 | 7935 | RR Pant | DD | 130 |
| 9257 | 620 | AB de Villiers | RCB | 129 |
| 5585 | 372 | CH Gayle | RCB | 128 |
| 3135 | 206 | M Vijay | CSK | 127 |
| 529 | 36 | DA Warner | SRH | 126 |
| 7752 | 516 | V Sehwag | KXIP | 122 |
| 10417 | 7953 | SR Watson | CSK | 121 |
deli['dismissal_kind'].unique()
array([nan, 'caught', 'bowled', 'run out', 'lbw', 'caught and bowled',
'stumped', 'retired hurt', 'hit wicket', 'obstructing the field'],
dtype=object)
dismissal_kinds = ['caught', 'bowled', 'lbw', 'caught and bowled', 'stumped', 'hit wicket']
bowlers = deli[deli['dismissal_kind'].isin(dismissal_kinds)]
bowlers = deli.groupby('bowler').apply(lambda x: x['dismissal_kind'].dropna().reset_index(name='wickets'))
bowlers
| index | wickets | ||
|---|---|---|---|
| bowler | |||
| A Ashish Reddy | 0 | 80497 | bowled |
| 1 | 80502 | caught | |
| 2 | 81750 | caught | |
| 3 | 82253 | caught | |
| 4 | 83084 | caught | |
| ... | ... | ... | ... |
| Z Khan | 114 | 142480 | caught |
| 115 | 143476 | bowled | |
| 116 | 144752 | caught | |
| 117 | 147288 | caught | |
| 118 | 149389 | caught |
8834 rows × 2 columns
bowlers_df = bowlers.groupby('bowler').count().reset_index()
top_bowlers= bowlers_df.sort_values(by='wickets', ascending=False)
top_bowlers_head=top_bowlers[top_bowlers.wickets>=40].head(10)
top_bowlers_head
| bowler | index | wickets | |
|---|---|---|---|
| 294 | SL Malinga | 188 | 188 |
| 84 | DJ Bravo | 168 | 168 |
| 6 | A Mishra | 165 | 165 |
| 113 | Harbhajan Singh | 161 | 161 |
| 231 | PP Chawla | 156 | 156 |
| 48 | B Kumar | 141 | 141 |
| 236 | R Ashwin | 138 | 138 |
| 300 | SP Narine | 137 | 137 |
| 330 | UT Yadav | 136 | 136 |
| 247 | R Vinay Kumar | 127 | 127 |
fig=plt.gcf()
fig.set_size_inches(15.5,5.5)
plt.xticks(fontsize=15, rotation=45)
plt.yticks(fontsize=15)
sns.barplot(top_bowlers_head['bowler'],top_bowlers['wickets'])
plt.title("Top Wicket Takers in IPL", fontsize=20, fontweight = 'bold')
plt.xlabel("Bowler", size = 25)
plt.ylabel("Wickets", size=25)
# save file
if save_files:
if not os.path.exists('plots'):
os.makedirs('plots')
filename = 'plots/TopWicketTakers'
plt.savefig(filename, bbox_inches = 'tight')
batsmen = deli.groupby('batsman').apply(lambda x:np.sum(x['batsman_runs'])).reset_index(name="runs")
top_batsmen = batsmen.sort_values(by='runs', ascending=False)
top_batsmen=top_batsmen[top_batsmen.runs>=1000]
top_batsmen.rename(columns = {'batsman': 'player'}, inplace = True)
top_batsmen.head()
| player | runs | |
|---|---|---|
| 486 | V Kohli | 5434 |
| 428 | SK Raina | 5415 |
| 367 | RG Sharma | 4914 |
| 112 | DA Warner | 4741 |
| 392 | S Dhawan | 4632 |
top_bowlers.rename(columns = {'bowler': 'player'}, inplace = True)
#Adding some more Feature Columns to deliveries Dataset
dic = dict()
for match_id in mat['id'].unique():
dic[match_id] = mat[mat['id']==match_id]['season'].values[0]
deli['season'] = [dic[i] for i in deli['match_id']]
# Selecting only the Death Overs i.e. 16-20 overs
deli = deli[deli['over']>=16]
deli.shape
(40400, 22)
mat.head(1)
| id | season | city | date | team1 | team2 | toss_winner | toss_decision | result | dl_applied | winner | win_by_runs | win_by_wickets | player_of_match | venue | umpire1 | umpire2 | umpire3 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 2017 | Hyderabad | 2017-04-05 | Sunrisers Hyderabad | Royal Challengers Bangalore | Royal Challengers Bangalore | field | normal | 0 | Sunrisers Hyderabad | 35 | 0 | Yuvraj Singh | Rajiv Gandhi International Stadium, Uppal | AY Dandekar | NJ Llong | NaN |
deli.head(1)
| match_id | inning | batting_team | bowling_team | over | ball | batsman | non_striker | bowler | is_super_over | ... | legbye_runs | noball_runs | penalty_runs | batsman_runs | extra_runs | total_runs | player_dismissed | dismissal_kind | fielder | season | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 93 | 1 | 1 | SRH | RCB | 16 | 1 | MC Henriques | Yuvraj Singh | YS Chahal | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | NaN | NaN | NaN | 2017 |
1 rows × 22 columns
sta= pd.DataFrame()
sta['Count']=mat['venue'].value_counts()
sta['Venue'] =sta.index
sta=sta[:20]
plt.style.use('ggplot')
fig=plt.gcf()
fig.set_size_inches(18.5,10.5)
plt.ylabel("Venue", size=20)
plt.xticks(fontsize=20)
plt.yticks(fontsize=16)
plt.title("Matches Played in Different Stadiums",fontsize=20)
ax=sns.barplot(x='Count', y='Venue', data=sta)
count=0
for i, v in enumerate(sta['Count']):
ax.text(v + 1.5, i, str(v),va="center", fontdict=dict(fontsize=20))
# save file
if save_files:
if not os.path.exists('plots'):
os.makedirs('plots')
filename = 'plots/matches_In_Diff_Stadiums'
plt.savefig(filename, bbox_inches = 'tight')
df = pd.DataFrame()
df1 = pd.DataFrame()
chunks = []
chunks.append(mat['toss_winner'][mat['toss_decision']=='bat'])
chunks.append(mat['team2'][(mat['toss_decision']=='field') & (mat['toss_winner'] == mat['team1'])])
chunks.append(mat['team1'][(mat['toss_decision']=='field') & (mat['toss_winner'] == mat['team2'])])
df = pd.DataFrame(pd.concat(chunks))
df = df.sort_index()
df.columns = ['team']
indexes = df[(df['team'] == mat['winner'])&(df.index == mat.index)].index
df1['venue'] = mat['venue'].value_counts()
df1['win_count'] = mat[mat.index.isin(indexes)]['venue'].value_counts()
df1['win %'] = 100 * df1['win_count']/df1['venue']
df1 = df1.sort_values(by = ['win %'],ascending = False)
df1 = df1[:20]
winPercent = df1['win %'].astype('int')
plt.style.use('ggplot')
fig=plt.gcf()
fig.set_size_inches(18.5,10.5)
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.title("Percent of matches won batting first in Different Stadiums(Top 20)",fontsize=19)
ax=sns.barplot(winPercent[:20], df1[:20].index)
plt.xlabel("Win % batting first", size=20)
count=0
for i, v in enumerate(winPercent):
ax.text(v + 1.5, i, str(v),va="center", fontdict=dict(fontsize=20))
# save file
if save_files:
if not os.path.exists('plots'):
os.makedirs('plots')
filename = 'plots/percent_BatFirstWin_StadiumWise'
plt.savefig(filename, bbox_inches = 'tight')
df1 = pd.DataFrame()
df1['venue'] = mat['venue'].value_counts()
df1['win_count'] = mat[mat.index.isin(indexes)]['venue'].value_counts()
df1['win %'] = 100 * df1['win_count']/df1['venue']
df1 = df1.sort_values(by = ['win %'])
df1['win % balling first'] = 100 - df1['win %']
df1 = df1[:20]
df1['win % balling first'] = df1['win % balling first'].astype('int')
plt.style.use('ggplot')
fig=plt.gcf()
fig.set_size_inches(18.5,10.5)
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.title("Percent of matches won fielding first in Different Stadiums(Top 20)",fontsize=20)
ax=sns.barplot(df1['win % balling first'],df1.index)
plt.xlabel("Win % fielding first", size=20)
count=0
for i, v in enumerate(df1['win % balling first']):
ax.text(v + 1.5, i, str(v),va="center", fontdict=dict(fontsize=20))
# save file
if save_files:
if not os.path.exists('plots'):
os.makedirs('plots')
filename = 'plots/percent_FieldFirstWin_StadiumWise'
plt.savefig(filename, bbox_inches = 'tight')
temp_series = mat.toss_decision.value_counts()
labels = (np.array(temp_series.index))
sizes = (np.array((temp_series / temp_series.sum())*100))
colors = ['gold', 'lightskyblue']
fig = plt.figure(figsize=(5,5))
plt.pie(sizes, labels=labels, colors=colors, autopct='%1.1f%%', shadow=True, startangle=90)
plt.title("Toss decision percentage")
plt.show()
# save file
if save_files:
if not os.path.exists('plots'):
os.makedirs('plots')
filename = 'plots/Toss_Decision_Percent_pie'
fig.savefig(filename, bbox_inches = 'tight')
num_of_wins = (mat.win_by_wickets>0).sum()
num_of_loss = (mat.win_by_wickets==0).sum()
labels = ["Wins", "Loss"]
total = float(num_of_wins + num_of_loss)
sizes = [(num_of_wins/total)*100, (num_of_loss/total)*100]
colors = ['gold', 'lightskyblue']
fig = plt.figure(figsize=(5,5))
plt.pie(sizes, labels=labels, colors=colors, autopct='%1.1f%%', shadow=True, startangle=90)
plt.title("Win percentage batting second")
plt.show()
# save file
if save_files:
if not os.path.exists('plots'):
os.makedirs('plots')
filename = 'plots/WinPercent_Chasing'
fig.savefig(filename, bbox_inches = 'tight')